package arkref.analysis;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang.StringEscapeUtils;
import arkref.data.Document;
import arkref.data.EntityGraph;
import arkref.data.Mention;
import arkref.data.Sentence;
import arkref.data.Word;
import arkref.data.EntityGraph.Entity;
import arkref.parsestuff.U;
import edu.stanford.nlp.trees.Tree;
/** This is a kinda lame, hard-to-read XML format:
* a list of entities, each containing their referent mentions.
*/
public class WriteEntityXml {
public static void go(EntityGraph eg, PrintWriter pw) throws FileNotFoundException {
pw.printf("<entities>\n");
List<Entity> ents = eg.sortedEntities();
for (Entity e : ents) {
pw.printf("<entity id=\"%s\">\n", e.id);
for (Mention m : e.sortedMentions()) {
pw.printf(" <mention ");
pw.printf(" id=\"%s\"", m.ID());
Sentence s = m.getSentence();
pw.printf(" sentence=\"%s\"", s.ID());
pw.printf(">\n");
if (m.node() != null) {
pw.printf(" <tokens>%s</tokens>\n", StringEscapeUtils.escapeXml(
m.node().yield().toString()));
}
pw.printf(" </mention>\n");
}
pw.printf("</entity>\n");
}
pw.printf("</entities>\n");
pw.close();
}
public static void writeTaggedDocument(Document d, PrintWriter pw) throws FileNotFoundException {
EntityGraph eg = d.entGraph();
//pw.printf("<doc>\n");
int sentnum = 0;
for(Sentence s: d.sentences()){
//pw.printf("<sentence>\n");
int wordnum = 0;
for(Tree leaf : s.rootNode().getLeaves()){
if(wordnum > 0){
pw.printf(" ");
}
for (Mention m : d.mentions()){
List<Tree> mentionLeaves = m.node().getLeaves();
if(mentionLeaves.get(0) == leaf){
pw.printf("<mention mentionid=\"%d\" entityid=\"%s\">", m.ID(), eg.entName(m));
}
}
pw.printf(leaf.yield().toString());
for (Mention m : d.mentions()){
List<Tree> mentionLeaves = m.node().getLeaves();
if(mentionLeaves.get(mentionLeaves.size()-1) == leaf){
pw.printf("</mention>", eg.entName(m));
}
}
wordnum++;
}
pw.printf("\n");
//pw.printf("</sentence>\n");
sentnum++;
}
//pw.printf("</doc>\n");
pw.close();
}
}